# Script to accompany Berghoff, R. (2022). Wh-Dependency Processing in a 
# Naturalistic Exposure Context: Sensitivity to Abstract Syntactic Structure in 
# High-Working-Memory L2 Speakers. Studies in Second Language Acquisition. 
####

# Session info ####
# R version 4.1.2
# [1] tidybayes_3.0.2 forcats_0.5.1   stringr_1.4.0   dplyr_1.0.7     purrr_0.3.4     readr_2.1.1    
# [7] tidyr_1.1.4     tibble_3.1.6    ggplot2_3.3.5   tidyverse_1.3.1 brms_2.16.3     Rcpp_1.0.7 

# Require packages (install them first if not already installed) ####
packages <- c("brms", "tidyverse", "tidybayes")
lapply(packages, library, character.only=TRUE)

# Question accuracy analyses ####
# Read in data ####
q_df <- read.csv("CMP-Acc-Data.csv")

# trialAnsCorr = lexical decision in cross-modal priming task 
# (i.e., alive/not alive)

# overall mean
q_df %>% 
  mutate(trialAnsCorr_new = ifelse(is.na(trialAnsCorr), 0, trialAnsCorr)) %>%
  # counts NA responses as incorrect
  summarize(meanDec = mean(trialAnsCorr_new, na.rm=TRUE))

# means per participant (last line calculates SD)
q_df %>% 
  mutate(trialAnsCorr_new = ifelse(is.na(trialAnsCorr), 0, trialAnsCorr)) %>% 
  group_by(participant) %>% 
  summarize(meanDec = mean(trialAnsCorr_new, na.rm=TRUE)) %>% 
  arrange(meanDec) %>% 
  #summarize(sdDec = sd(meanDec))
  
# qCorr = end-of-trial comprehension question
q_df %>% 
  filter(sentencep1!="Penguin-01.wav") %>% 
  # this question was problematic: no clear answer. removed from average calculation
  summarize(meanQCorr = mean(qCorr, na.rm=TRUE))

# means per participant (last line calculates SD)
q_df %>% 
  filter(sentencep1!="Penguin-01.wav") %>% 
  group_by(participant) %>% 
  summarize(meanQCorr = mean(qCorr, na.rm=TRUE)) %>% 
  arrange(meanQCorr) %>% 
  #summarize(sdQCorr = sd(meanQCorr))
  
# Reaction time analyses ####
# Read in data ####
rt_df <- read.csv("CMP-RT-Data.csv")

# mean RTs and SDs in Table 2 ####
# note: gap == 1 is the trace position
# note: gap == 4 is the control position

rt_df %>%
  filter(prop.score!=0 & AOAL2!=0) %>%
  group_by(match, gap) %>%
  summarize(meanRT = mean(RTtrial), sdRT = sd(RTtrial))

# Set prior, based on Roberts and Felser (2007) ####
prior <- c(set_prior("normal(-3,3)", class = "b", coef = "gap1"), 
           set_prior("normal(-27,27)", class = "b", coef = "match1"),   
           set_prior("normal(-2,2)", class = "b", coef = "match1:gap1"))

# Scaling and contrast coding ####
rt_df$prop.score_scale <- as.vector(scale(rt_df$prop.score))

rt_df$gap <- as.factor(rt_df$gap)
rt_df$gap <- fct_rev(rt_df$gap)
contrasts(rt_df$gap) <- c(-0.5, 0.5)

rt_df$match <- as.factor(rt_df$match)
contrasts(rt_df$match) <- c(-0.5, 0.5)

# Main model, output in Table 3 ####
bayes_mod_main_prior <- brm(RTtrial ~ prop.score_scale * match * gap 
                            + (1+match*gap|ITEM_clean) 
                            + (1+match*gap|PARTICIPANT), 
                            family = lognormal(), 
                            data = subset(rt_df, prop.score!=0 & AOAL2!=0), 
                            prior = prior, chains = 4, iter = 10000, 
                            warmup = 2000, sample_prior = TRUE, cores = 4)
bayes_mod_main_prior <- add_criterion(bayes_mod_main_prior, "loo")

# Calculation of bayes factors, main model ####
summary(bayes_mod_main_prior)
print(hypothesis(bayes_mod_main_prior, hypothesis = 'match1 < 0')) 
print(hypothesis(bayes_mod_main_prior, hypothesis = 'gap1 < 0')) 
print(hypothesis(bayes_mod_main_prior, hypothesis = 'match1:gap1 < 0')) 
print(hypothesis(bayes_mod_main_prior, hypothesis = 'prop.score_scale < 0')) 
print(hypothesis(bayes_mod_main_prior, hypothesis = 'prop.score_scale:match1 > 0')) 
print(hypothesis(bayes_mod_main_prior, hypothesis = 'prop.score_scale:gap1 > 0'))
print(hypothesis(bayes_mod_main_prior, hypothesis = 'prop.score_scale:match1:gap1 < 0')) 

# Model comparisons: should we add C-test score, L2 exposure or AoA? ####
rt_df$ctest_scale <- as.vector(scale(rt_df$CTEST))

bayes_mod_main_CTest <- brm(RTtrial ~ ctest_scale * prop.score_scale * match * gap 
                            + (1+match*gap|ITEM_clean) 
                            + (1+match*gap|PARTICIPANT), 
                            family = lognormal(), 
                            data = subset(rt_df, prop.score!=0 & AOAL2!=0), 
                            prior = prior, chains = 4, iter = 10000, 
                            warmup = 2000, sample_prior = TRUE, cores = 4)
bayes_mod_main_CTest <- add_criterion(bayes_mod_main_CTest, "loo")

rt_df$AOA_scale <- as.vector(scale(rt_df$AOAL2))

bayes_mod_main_AOA <- brm(RTtrial ~ AOA_scale * prop.score_scale * match * gap 
                            + (1+match*gap|ITEM_clean) 
                            + (1+match*gap|PARTICIPANT), 
                            family = lognormal(), 
                            data = subset(rt_df, prop.score!=0 & AOAL2!=0), 
                            prior = prior, chains = 4, iter = 10000, 
                            warmup = 2000, sample_prior = TRUE, cores = 4)
bayes_mod_main_AOA <- add_criterion(bayes_mod_main_AOA, "loo")

rt_df$L2EXP_scale <- as.vector(scale(rt_df$L2EXPOSURE))

bayes_mod_main_L2EXP <- brm(RTtrial ~ L2EXP_scale * prop.score_scale * match * gap 
                          + (1+match*gap|ITEM_clean) 
                          + (1+match*gap|PARTICIPANT), 
                          family = lognormal(), 
                          data = subset(rt_df, prop.score!=0 & AOAL2!=0), 
                          prior = prior, chains = 4, iter = 10000, 
                          warmup = 2000, sample_prior = TRUE, cores = 4)
bayes_mod_main_L2EXP <- add_criterion(bayes_mod_main_L2EXP, "loo")

loo_compare(bayes_mod_main_prior, bayes_mod_main_CTest, bayes_mod_main_AOA, bayes_mod_main_L2EXP)
# Interpretation: the top model (bayes_mod_main_prior) is the best one

# Median split on wm ####
rt_df %>%
  filter(prop.score!=0 & AOAL2!=0) %>%
  summarize(md_wm = median(prop.score))

# Md_wm = 0.557

rt_df_add <- rt_df %>%
  mutate(low_span = case_when(prop.score <= 0.557 ~ 1,
                              prop.score > 0.557 ~ 0))

# Mean RTs and SDs in Table 4 ####
rt_df_add %>%
  filter(prop.score!=0 & AOAL2!=0) %>%
  group_by(low_span, match, gap) %>%
  summarize(meanRT = mean(RTtrial), sdRT = sd(RTtrial))

rt_df_add$low_span <- as.factor(rt_df_add$low_span)

# Check AOA and exposure across high- and low-WM participants
rt_df_add %>%
  group_by(low_span) %>%
  summarize(mean_AOA = mean(AOAL2), mean_exp = mean(L2EXPOSURE))

rt_df_add %>%
  distinct(PARTICIPANT, .keep_all = TRUE) %>%
  summarize(pval = t.test(AOAL2 ~ low_span, var.equal = TRUE)$p.value)

rt_df_add %>%
  distinct(PARTICIPANT, .keep_all = TRUE) %>%
  summarize(pval = t.test(L2EXPOSURE ~ low_span, var.equal = TRUE)$p.value)

# Low-span participants ####
bayes_mod_lowSpan_prior <- brm(RTtrial ~ match * gap 
                               + (1+match*gap|ITEM_clean) 
                               + (1+match*gap|PARTICIPANT), 
                               family = lognormal(), 
                               data = subset(rt_df_add, low_span==1 & prop.score!=0 & AOAL2!=0), 
                               prior = prior, chains = 4, 
                               iter = 10000, warmup = 2000, 
                               sample_prior = TRUE, cores = 4)
summary(bayes_mod_lowSpan_prior)
print(hypothesis(bayes_mod_lowSpan_prior, hypothesis = 'match1 < 0')) 
print(hypothesis(bayes_mod_lowSpan_prior, hypothesis = 'gap1 < 0'))
print(hypothesis(bayes_mod_lowSpan_prior, hypothesis = 'match1:gap1 < 0')) 

# High-span participants ####
bayes_mod_highSpan_prior <- brm(RTtrial ~ match * gap 
                                + (1+match*gap|ITEM_clean) 
                                + (1+match*gap|PARTICIPANT), 
                                family = lognormal(), 
                                data = subset(rt_df_add, low_span==0 & prop.score!=0 & AOAL2!=0), 
                                prior = prior, chains = 4, iter = 10000, 
                                warmup = 2000, sample_prior = TRUE, cores = 4)
summary(bayes_mod_highSpan_prior)
print(hypothesis(bayes_mod_highSpan_prior, hypothesis = 'match1 < 0'))
print(hypothesis(bayes_mod_highSpan_prior, hypothesis = 'gap1 < 0')) 
print(hypothesis(bayes_mod_highSpan_prior, hypothesis = 'match1:gap1 < 0')) 

# Visualizing ####
# Prior samples: Figure A1 in Appendix ####
prior_Samples <- prior_samples(bayes_mod_highSpan_prior)

prior_Samples_gathered <- prior_Samples %>%
  gather(key = "effect", value = "value", 2:4)

facet_labels <- c("Position", "Target type", "Position x Target Type")
names(facet_labels) <- c("b_gap1", "b_match1", "b_match1:gap1")

prior_Samples_gathered %>%
  ggplot(aes(x = value)) +
  stat_halfeye() + 
  facet_wrap(~effect, scales="free", labeller = labeller(effect=facet_labels)) +
  theme_bw() + 
  ylab("Density") +
  xlab("") +
  geom_vline(xintercept=0.0, linetype="dotted")

# Posterior samples: High-span participants (Figure 2) ####
post_Samples <- posterior_samples(bayes_mod_highSpan_prior)

post_Samples_gathered <- post_Samples %>%
  gather(key = "effect", value = "value", 2:4)

facet_labels <- c("Position", "Target type", "Position x Target Type")
names(facet_labels) <- c("b_gap1", "b_match1", "b_match1:gap1")

post_Samples_gathered %>%
  ggplot(aes(x = value)) +
  stat_halfeye() + 
  facet_wrap(~effect, scales="free", labeller = labeller(effect=facet_labels)) +
  theme_bw() + 
  ylab("Density") +
  xlab("") +
  geom_vline(xintercept=0.0, linetype="dotted")

# Posterior samples: Low-span participants (Figure 1) ####
post_Samples_lowSpan <- posterior_samples(bayes_mod_lowSpan_prior)

post_Samples_gathered_lowSpan <- post_Samples_lowSpan %>%
  gather(key = "effect", value = "value", 2:4)

facet_labels <- c("Position", "Target type", "Position x Target Type")
names(facet_labels) <- c("b_gap1", "b_match1", "b_match1:gap1")

post_Samples_gathered_lowSpan %>%
  ggplot(aes(x = value)) +
  stat_halfeye() + 
  facet_wrap(~effect, scales="free", labeller = labeller(effect=facet_labels)) +
  theme_bw() + 
  ylab("Density") +
  xlab("") +
  geom_vline(xintercept=0.0, linetype="dotted")
